/*
 * Copyright (c) 2022 HiSilicon (Shanghai) Technologies CO., LIMITED.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <signal.h>
#include <pthread.h>
#include <sys/prctl.h>
#include <math.h>
#include <assert.h>

#include "hi_common.h"
#include "hi_comm_sys.h"
#include "hi_comm_svp.h"
#include "sample_comm_svp.h"
#include "hi_comm_ive.h"
#include "sample_comm_nnie.h"
#include "sample_media_ai.h"
#include "ai_infer_process.h"

#ifdef __cplusplus
#if __cplusplus
extern "C" {
#endif
#endif /* End of #ifdef __cplusplus */

//#define USLEEP_TIME   100 // 100: usleep time, in microseconds

// #define ARRAY_SUBSCRIPT_0     0
// #define ARRAY_SUBSCRIPT_1     1
// #define ARRAY_SUBSCRIPT_2     2
// #define ARRAY_SUBSCRIPT_3     3
// #define ARRAY_SUBSCRIPT_4     4
// #define ARRAY_SUBSCRIPT_5     5
// #define ARRAY_SUBSCRIPT_6     6
// #define ARRAY_SUBSCRIPT_7     7
// #define ARRAY_SUBSCRIPT_8     8
// #define ARRAY_SUBSCRIPT_9     9

#define ARRAY_SUBSCRIPT_OFFSET_1    1
#define ARRAY_SUBSCRIPT_OFFSET_2    2
#define ARRAY_SUBSCRIPT_OFFSET_3    3

#define THRESH_MIN         0.7


/* yolov3 parameter */
static SAMPLE_SVP_NNIE_MODEL_S g_stYolov3Model = {0};
static SAMPLE_SVP_NNIE_PARAM_S g_stYolov3NnieParam = {0};
static SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S g_stYolov3SoftwareParam = {0};

//把 origImg填充到NNIE系统里
static HI_S32 FillNnieByImg(SAMPLE_SVP_NNIE_CFG_S* pstNnieCfg,
    SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, int segId, int nodeId, const IVE_IMAGE_S *img)
{
    HI_U32 i=0, j=0, n=0;
    HI_U32 u32Height = 0, u32Width = 0, u32Chn = 0, u32Stride = 0;//u32Dim = 0;
    HI_U32 u32VarSize=0;
    HI_U8 *pu8PicAddr = NULL;
    //HI_U32 *pu32StepAddr = NULL;
    //HI_U32 u32TotalStepNum = 0;
    //HI_ULONG ulSize;

    /* get data size */
    if (SVP_BLOB_TYPE_U8 <= pstNnieParam->astSegData[segId].astSrc[nodeId].enType &&
        SVP_BLOB_TYPE_YVU422SP >= pstNnieParam->astSegData[segId].astSrc[nodeId].enType) {
        u32VarSize = sizeof(HI_U8);
    } else {
        u32VarSize = sizeof(HI_U32);
    }

    /* fill src data */
    if (SVP_BLOB_TYPE_SEQ_S32 == pstNnieParam->astSegData[segId].astSrc[nodeId].enType) {
        HI_ASSERT(0);// 这一段在NNIE_FillSrcData里有处理
    } else {
        u32Height = pstNnieParam->astSegData[segId].astSrc[nodeId].unShape.stWhc.u32Height;
        u32Width = pstNnieParam->astSegData[segId].astSrc[nodeId].unShape.stWhc.u32Width;
        u32Chn = pstNnieParam->astSegData[segId].astSrc[nodeId].unShape.stWhc.u32Chn;
        u32Stride = pstNnieParam->astSegData[segId].astSrc[nodeId].u32Stride;
        pu8PicAddr = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U8,
            pstNnieParam->astSegData[segId].astSrc[nodeId].u64VirAddr);

        if (SVP_BLOB_TYPE_YVU420SP == pstNnieParam->astSegData[segId].astSrc[nodeId].enType) {
            HI_ASSERT(pstNnieParam->astSegData[segId].astSrc[nodeId].u32Num == 1);
            for (n = 0; n < pstNnieParam->astSegData[segId].astSrc[nodeId].u32Num; n++) {
                // Y
                const uint8_t *srcData = (const uint8_t*)(uintptr_t)img->au64VirAddr[0];
                HI_ASSERT(srcData);
                for (j = 0; j < u32Height; j++) {
                    if (memcpy_s(pu8PicAddr, u32Width * u32VarSize, srcData, u32Width * u32VarSize) != EOK) {
                        HI_ASSERT(0);
                    }
                    pu8PicAddr += u32Stride;
                    srcData += img->au32Stride[0];
                }
                // UV
                srcData = (const uint8_t*)(uintptr_t)img->au64VirAddr[1];
                HI_ASSERT(srcData);
                for (j = 0; j < u32Height / 2; j++) { // 2: 1/2Height
                    if (memcpy_s(pu8PicAddr, u32Width * u32VarSize, srcData, u32Width * u32VarSize) != EOK) {
                        HI_ASSERT(0);
                    }
                    pu8PicAddr += u32Stride;
                    srcData += img->au32Stride[1];
                }
            }
        } else if (SVP_BLOB_TYPE_YVU422SP == pstNnieParam->astSegData[segId].astSrc[nodeId].enType) {
            HI_ASSERT(0);
        } else {
            for (n = 0; n < pstNnieParam->astSegData[segId].astSrc[nodeId].u32Num; n++) {
                for (i = 0; i < u32Chn; i++) {
                    const uint8_t *srcData = (const uint8_t*)(uintptr_t)img->au64VirAddr[i];
                    HI_ASSERT(srcData);
                    for (j = 0; j < u32Height; j++) {
                        if (memcpy_s(pu8PicAddr, u32Width * u32VarSize, srcData, u32Width * u32VarSize) != EOK) {
                            HI_ASSERT(0);
                        }
                        pu8PicAddr += u32Stride;
                        srcData += img->au32Stride[i];
                    }
                }
            }
        }

        SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astSegData[segId].astSrc[nodeId].u64PhyAddr,
            SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_VOID, pstNnieParam->astSegData[segId].astSrc[nodeId].u64VirAddr),
            pstNnieParam->astSegData[segId].astSrc[nodeId].u32Num*u32Chn*u32Height*u32Stride);
    }

    return HI_SUCCESS;
}

/* function : NNIE Forward */ //从sample_nnie.c里复制出来的，应该没啥带问题
static HI_S32 SAMPLE_SVP_NNIE_Forward(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
    SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S *pstInputDataIdx, SAMPLE_SVP_NNIE_PROCESS_SEG_INDEX_S *pstProcSegIdx,
    HI_BOOL bInstant)
{
    HI_S32 s32Ret = HI_SUCCESS;
    HI_U32 i, j;
    HI_BOOL bFinish = HI_FALSE;
    SVP_NNIE_HANDLE hSvpNnieHandle = 0;
    HI_U32 u32TotalStepNum = 0;

    SAMPLE_SVP_CHECK_EXPR_RET(pstProcSegIdx->u32SegIdx >= pstNnieParam->pstModel->u32NetSegNum ||
        pstInputDataIdx->u32SegIdx >= pstNnieParam->pstModel->u32NetSegNum ||
        pstNnieParam->pstModel->u32NetSegNum > SVP_NNIE_MAX_NET_SEG_NUM,
        HI_INVALID_VALUE, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error, pstProcSegIdx->u32SegIdx(%u) and pstInputDataIdx->u32SegIdx(%u) "
        "should be less than %u, pstNnieParam->pstModel->u32NetSegNum(%u) can't be greater than %u!\n",
        pstProcSegIdx->u32SegIdx, pstInputDataIdx->u32SegIdx, pstNnieParam->pstModel->u32NetSegNum,
        pstNnieParam->pstModel->u32NetSegNum, SVP_NNIE_MAX_NET_SEG_NUM);

    SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].stTskBuf.u64PhyAddr,
        SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_VOID,
        pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].stTskBuf.u64VirAddr),
        pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].stTskBuf.u32Size);

    for (i = 0; i < pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].u32DstNum; i++) {
        if (pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].enType == SVP_BLOB_TYPE_SEQ_S32) {
            for (j = 0; j < pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Num; j++) {
                u32TotalStepNum += *(SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32,
                    pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].unShape.stSeq.u64VirAddrStep) +
                    j);
            }
            SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64PhyAddr,
                SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_VOID,
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64VirAddr),
                u32TotalStepNum * pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Stride);
        } else {
            SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64PhyAddr,
                SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_VOID,
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64VirAddr),
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Num *
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].unShape.stWhc.u32Chn *
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].unShape.stWhc.u32Height *
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Stride);
        }
    }

    /* set input blob according to node name */
    if (pstInputDataIdx->u32SegIdx != pstProcSegIdx->u32SegIdx) {
        for (i = 0; i < pstNnieParam->pstModel->astSeg[pstProcSegIdx->u32SegIdx].u16SrcNum; i++) {
            for (j = 0; j < pstNnieParam->pstModel->astSeg[pstInputDataIdx->u32SegIdx].u16DstNum; j++) {
                if (strncmp(pstNnieParam->pstModel->astSeg[pstInputDataIdx->u32SegIdx].astDstNode[j].szName,
                    pstNnieParam->pstModel->astSeg[pstProcSegIdx->u32SegIdx].astSrcNode[i].szName,
                    SVP_NNIE_NODE_NAME_LEN) == 0) {
                    pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astSrc[i] =
                        pstNnieParam->astSegData[pstInputDataIdx->u32SegIdx].astDst[j];
                    break;
                }
            }
            SAMPLE_SVP_CHECK_EXPR_RET((j == pstNnieParam->pstModel->astSeg[pstInputDataIdx->u32SegIdx].u16DstNum),
                HI_FAILURE, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,can't find %d-th seg's %d-th src blob!\n",
                pstProcSegIdx->u32SegIdx, i);
        }
    }

    /* NNIE_Forward */
    s32Ret = HI_MPI_SVP_NNIE_Forward(&hSvpNnieHandle, pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astSrc,
        pstNnieParam->pstModel, pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst,
        &pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx], bInstant);
    SAMPLE_SVP_CHECK_EXPR_RET(s32Ret != HI_SUCCESS, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,HI_MPI_SVP_NNIE_Forward failed!\n");

    if (bInstant) {
        /* Wait NNIE finish */
        while (HI_ERR_SVP_NNIE_QUERY_TIMEOUT == (s32Ret = HI_MPI_SVP_NNIE_Query(
            pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].enNnieId, hSvpNnieHandle, &bFinish, HI_TRUE))) {
            usleep(100); /* sleep 100 micro_seconds */
            SAMPLE_SVP_TRACE(SAMPLE_SVP_ERR_LEVEL_INFO, "HI_MPI_SVP_NNIE_Query Query timeout!\n");
        }
    }
    u32TotalStepNum = 0;
    for (i = 0; i < pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].u32DstNum; i++) {
        if (SVP_BLOB_TYPE_SEQ_S32 == pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].enType) {
            for (j = 0; j < pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Num; j++) {
                u32TotalStepNum += *(SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32,
                    pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].unShape.stSeq.u64VirAddrStep) +
                    j);
            }
            SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64PhyAddr,
                SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_VOID,
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64VirAddr),
                u32TotalStepNum * pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Stride);
        } else {
            SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64PhyAddr,
                SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_VOID,
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64VirAddr),
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Num *
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].unShape.stWhc.u32Chn *
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].unShape.stWhc.u32Height *
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Stride);
        }
    }

    return s32Ret;
}

//从sample_nnie.c里复制出来的，应该没啥带问题
static HI_U32 SAMPLE_SVP_NNIE_Yolov3_GetResultTmpBuf(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
    SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S *pstSoftwareParam)
{
    HI_U64 u64TotalSize, u64AssistStackSize, u64TotalBboxSize, u64DstBlobSize, u64Tmp;
    HI_U64 u64TotalBboxNum = 0;
    HI_U64 u64MaxBlobSize = 0;
    HI_U32 i;

    CHECK_NULL_PTR(pstNnieParam);
    CHECK_NULL_PTR(pstSoftwareParam);
    for (i = 0; i < pstNnieParam->pstModel->astSeg[0].u16DstNum; i++) {
        u64DstBlobSize = pstNnieParam->pstModel->astSeg[0].astDstNode[i].unShape.stWhc.u32Width * sizeof(HI_U32);
        SAMPLE_SVP_CHECK_EXPR_RET(u64DstBlobSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error,u64DstBlobSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);

        u64DstBlobSize *= pstNnieParam->pstModel->astSeg[0].astDstNode[i].unShape.stWhc.u32Height;
        SAMPLE_SVP_CHECK_EXPR_RET(u64DstBlobSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error,u64DstBlobSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);

        u64DstBlobSize *= pstNnieParam->pstModel->astSeg[0].astDstNode[i].unShape.stWhc.u32Chn;
        SAMPLE_SVP_CHECK_EXPR_RET(u64DstBlobSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error,u64DstBlobSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);

        if (u64MaxBlobSize < u64DstBlobSize) {
            u64MaxBlobSize = u64DstBlobSize;
        }

        u64Tmp = (HI_U64)pstSoftwareParam->au32GridNumWidth[i] * pstSoftwareParam->au32GridNumHeight[i];
        SAMPLE_SVP_CHECK_EXPR_RET(u64Tmp > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error, %u-th au32GridNumWidth * au32GridNumHeight should be less than %u!\n", i, SAMPLE_SVP_NNIE_MAX_MEM);
        u64Tmp *= pstSoftwareParam->u32BboxNumEachGrid;
        SAMPLE_SVP_CHECK_EXPR_RET(u64Tmp > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error,u64Tmp should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);

        u64TotalBboxNum += u64Tmp;
        SAMPLE_SVP_CHECK_EXPR_RET(u64TotalBboxNum > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error,u64TotalBboxNum should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);
    }
    u64AssistStackSize = u64TotalBboxNum * sizeof(SAMPLE_SVP_NNIE_STACK_S);
    SAMPLE_SVP_CHECK_EXPR_RET(u64AssistStackSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,u64TotalBboxSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);

    u64TotalBboxSize = u64TotalBboxNum * sizeof(SAMPLE_SVP_NNIE_YOLOV3_BBOX_S);
    SAMPLE_SVP_CHECK_EXPR_RET(u64TotalBboxSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,u64TotalBboxSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);

    u64TotalSize = (u64MaxBlobSize + u64AssistStackSize + u64TotalBboxSize);
    SAMPLE_SVP_CHECK_EXPR_RET(u64TotalSize > SAMPLE_SVP_NNIE_MAX_MEM, 0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,u64TotalSize should be less than %u!\n", SAMPLE_SVP_NNIE_MAX_MEM);

    return (HI_U32)u64TotalSize;
}

/* function : Yolov3 software para init */
static HI_S32 SampleSvpNnieYolov3SoftwareInit(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S* pstSoftWareParam)
{
    HI_S32 s32Ret = HI_SUCCESS;
    HI_U32 u32ClassNum = 0;
    HI_U32 u32TotalSize = 0;
    HI_U32 u32DstRoiSize = 0;
    HI_U32 u32DstScoreSize = 0;
    HI_U32 u32ClassRoiNumSize = 0;
    HI_U32 u32TmpBufTotalSize = 0;
    HI_U64 u64PhyAddr = 0;
    HI_U8 *pu8VirAddr = NULL;

    /* The values of the following parameters are related to algorithm principles.
        For details, see related algorithms. */
    pstSoftWareParam->u32OriImHeight = pstNnieParam->astSegData[0].astSrc[0].unShape.stWhc.u32Height;
    pstSoftWareParam->u32OriImWidth = pstNnieParam->astSegData[0].astSrc[0].unShape.stWhc.u32Width;
    pstSoftWareParam->u32BboxNumEachGrid = 3;
    pstSoftWareParam->u32ClassNum = 24;  //改输出的class数  //80  //24
    pstSoftWareParam->au32GridNumHeight[0] = 13;
    pstSoftWareParam->au32GridNumHeight[1] = 26;
    pstSoftWareParam->au32GridNumHeight[2] = 52;
    pstSoftWareParam->au32GridNumWidth[0] = 13;
    pstSoftWareParam->au32GridNumWidth[1] = 26;
    pstSoftWareParam->au32GridNumWidth[2] = 52;
    pstSoftWareParam->u32NmsThresh = (HI_U32)(0.3f * SAMPLE_SVP_NNIE_QUANT_BASE);
    pstSoftWareParam->u32ConfThresh = (HI_U32)(0.5f * SAMPLE_SVP_NNIE_QUANT_BASE);
    pstSoftWareParam->u32MaxRoiNum = 10;
    pstSoftWareParam->af32Bias[0][0] = 116;
    pstSoftWareParam->af32Bias[0][1] = 90;
    pstSoftWareParam->af32Bias[0][2] = 156;
    pstSoftWareParam->af32Bias[0][3] = 198;
    pstSoftWareParam->af32Bias[0][4] = 373;
    pstSoftWareParam->af32Bias[0][5] = 326;
    pstSoftWareParam->af32Bias[1][0] = 30;
    pstSoftWareParam->af32Bias[1][1] = 61;
    pstSoftWareParam->af32Bias[1][2] = 62;
    pstSoftWareParam->af32Bias[1][3] = 45;
    pstSoftWareParam->af32Bias[1][4] = 59;
    pstSoftWareParam->af32Bias[1][5] = 119;
    pstSoftWareParam->af32Bias[2][0] = 10;
    pstSoftWareParam->af32Bias[2][1] = 13;
    pstSoftWareParam->af32Bias[2][2] = 16;
    pstSoftWareParam->af32Bias[2][3] = 30;
    pstSoftWareParam->af32Bias[2][4] = 33;
    pstSoftWareParam->af32Bias[2][5] = 23;

    /* Malloc assist buffer memory */
    u32ClassNum = pstSoftWareParam->u32ClassNum + 1;  //添加一类背景

    SAMPLE_SVP_CHECK_EXPR_RET(SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM != pstNnieParam->pstModel->astSeg[0].u16DstNum,
        HI_FAILURE, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,pstNnieParam->pstModel->astSeg[0].u16DstNum(%d) should be %d!\n",
        pstNnieParam->pstModel->astSeg[0].u16DstNum, SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM);
    u32TmpBufTotalSize = SAMPLE_SVP_NNIE_Yolov3_GetResultTmpBuf(pstNnieParam, pstSoftWareParam);
    SAMPLE_SVP_CHECK_EXPR_RET(u32TmpBufTotalSize == 0, HI_ERR_SVP_NNIE_ILLEGAL_PARAM, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error, SAMPLE_SVP_NNIE_Yolov3_GetResultTmpBuf failed!\n");
    u32DstRoiSize = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum * pstSoftWareParam->u32MaxRoiNum * sizeof(HI_U32) *
        SAMPLE_SVP_NNIE_COORDI_NUM);
    u32DstScoreSize = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum * pstSoftWareParam->u32MaxRoiNum * sizeof(HI_U32));
    u32ClassRoiNumSize = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum * sizeof(HI_U32));
    u32TotalSize = u32TotalSize + u32DstRoiSize + u32DstScoreSize + u32ClassRoiNumSize + u32TmpBufTotalSize;
    s32Ret = SAMPLE_COMM_SVP_MallocCached("SAMPLE_YOLOV3_INIT", NULL, (HI_U64 *)&u64PhyAddr, (void **)&pu8VirAddr,
        u32TotalSize);
    SAMPLE_SVP_CHECK_EXPR_RET(s32Ret != HI_SUCCESS, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,Malloc memory failed!\n");
    (HI_VOID)memset_s(pu8VirAddr, u32TotalSize, 0, u32TotalSize);
    SAMPLE_COMM_SVP_FlushCache(u64PhyAddr, (void *)pu8VirAddr, u32TotalSize);

    /* set each tmp buffer addr */
    pstSoftWareParam->stGetResultTmpBuf.u64PhyAddr = u64PhyAddr;
    pstSoftWareParam->stGetResultTmpBuf.u64VirAddr = SAMPLE_SVP_NNIE_CONVERT_PTR_TO_ADDR(HI_U64, pu8VirAddr);

    /* set result blob */
    pstSoftWareParam->stDstRoi.enType = SVP_BLOB_TYPE_S32;
    pstSoftWareParam->stDstRoi.u64PhyAddr = u64PhyAddr + u32TmpBufTotalSize;
    pstSoftWareParam->stDstRoi.u64VirAddr =
        SAMPLE_SVP_NNIE_CONVERT_PTR_TO_ADDR(HI_U64, pu8VirAddr + u32TmpBufTotalSize);
    pstSoftWareParam->stDstRoi.u32Stride = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum * pstSoftWareParam->u32MaxRoiNum *
        sizeof(HI_U32) * SAMPLE_SVP_NNIE_COORDI_NUM);
    pstSoftWareParam->stDstRoi.u32Num = 1;
    pstSoftWareParam->stDstRoi.unShape.stWhc.u32Chn = 1;
    pstSoftWareParam->stDstRoi.unShape.stWhc.u32Height = 1;
    pstSoftWareParam->stDstRoi.unShape.stWhc.u32Width =
        u32ClassNum * pstSoftWareParam->u32MaxRoiNum * SAMPLE_SVP_NNIE_COORDI_NUM;

    pstSoftWareParam->stDstScore.enType = SVP_BLOB_TYPE_S32;
    pstSoftWareParam->stDstScore.u64PhyAddr = u64PhyAddr + u32TmpBufTotalSize + u32DstRoiSize;
    pstSoftWareParam->stDstScore.u64VirAddr =
        SAMPLE_SVP_NNIE_CONVERT_PTR_TO_ADDR(HI_U64, pu8VirAddr + u32TmpBufTotalSize + u32DstRoiSize);
    pstSoftWareParam->stDstScore.u32Stride =
        SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum * pstSoftWareParam->u32MaxRoiNum * sizeof(HI_U32));
    pstSoftWareParam->stDstScore.u32Num = 1;
    pstSoftWareParam->stDstScore.unShape.stWhc.u32Chn = 1;
    pstSoftWareParam->stDstScore.unShape.stWhc.u32Height = 1;
    pstSoftWareParam->stDstScore.unShape.stWhc.u32Width = u32ClassNum * pstSoftWareParam->u32MaxRoiNum;

    pstSoftWareParam->stClassRoiNum.enType = SVP_BLOB_TYPE_S32;
    pstSoftWareParam->stClassRoiNum.u64PhyAddr = u64PhyAddr + u32TmpBufTotalSize + u32DstRoiSize + u32DstScoreSize;
    pstSoftWareParam->stClassRoiNum.u64VirAddr =
        SAMPLE_SVP_NNIE_CONVERT_PTR_TO_ADDR(HI_U64, pu8VirAddr + u32TmpBufTotalSize + u32DstRoiSize + u32DstScoreSize);
    pstSoftWareParam->stClassRoiNum.u32Stride = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum * sizeof(HI_U32));
    pstSoftWareParam->stClassRoiNum.u32Num = 1;
    pstSoftWareParam->stClassRoiNum.unShape.stWhc.u32Chn = 1;
    pstSoftWareParam->stClassRoiNum.unShape.stWhc.u32Height = 1;
    pstSoftWareParam->stClassRoiNum.unShape.stWhc.u32Width = u32ClassNum;

    return s32Ret;
}

/* function : Yolov3 software deinit */ //从sample_nnie.c里复制过来的
static HI_S32 SAMPLE_SVP_NNIE_Yolov3_SoftwareDeinit(SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S *pstSoftWareParam)
{
    HI_S32 s32Ret = HI_SUCCESS;
    SAMPLE_SVP_CHECK_EXPR_RET(pstSoftWareParam == NULL, HI_INVALID_VALUE, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error, pstSoftWareParam can't be NULL!\n");
    if ((pstSoftWareParam->stGetResultTmpBuf.u64PhyAddr != 0) &&
        (pstSoftWareParam->stGetResultTmpBuf.u64VirAddr != 0)) {
        SAMPLE_SVP_MMZ_FREE(pstSoftWareParam->stGetResultTmpBuf.u64PhyAddr,
            pstSoftWareParam->stGetResultTmpBuf.u64VirAddr);
        pstSoftWareParam->stGetResultTmpBuf.u64PhyAddr = 0;
        pstSoftWareParam->stGetResultTmpBuf.u64VirAddr = 0;
        pstSoftWareParam->stDstRoi.u64PhyAddr = 0;
        pstSoftWareParam->stDstRoi.u64VirAddr = 0;
        pstSoftWareParam->stDstScore.u64PhyAddr = 0;
        pstSoftWareParam->stDstScore.u64VirAddr = 0;
        pstSoftWareParam->stClassRoiNum.u64PhyAddr = 0;
        pstSoftWareParam->stClassRoiNum.u64VirAddr = 0;
    }
    return s32Ret;
}

/* function : Yolov3 Deinit */
static HI_S32 SampleSvpNnieYolov3Deinit(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
    SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S* pstSoftWareParam, SAMPLE_SVP_NNIE_MODEL_S *pstNnieModel)
{
    HI_S32 s32Ret = HI_SUCCESS;
    /* hardware deinit */
    if (pstNnieParam != NULL) {
        s32Ret = SAMPLE_COMM_SVP_NNIE_ParamDeinit(pstNnieParam);
        SAMPLE_SVP_CHECK_EXPR_TRACE(HI_SUCCESS != s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error,SAMPLE_COMM_SVP_NNIE_ParamDeinit failed!\n");
    }
    /* software deinit */
    if (pstSoftWareParam != NULL) {
        s32Ret = SAMPLE_SVP_NNIE_Yolov3_SoftwareDeinit(pstSoftWareParam);
        SAMPLE_SVP_CHECK_EXPR_TRACE(HI_SUCCESS != s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error,SampleSvpNnieYolov3SoftwareDeinit failed!\n");
    }
    /* model deinit */
    if (pstNnieModel != NULL) {
        s32Ret = SAMPLE_COMM_SVP_NNIE_UnloadModel(pstNnieModel);
        SAMPLE_SVP_CHECK_EXPR_TRACE(HI_SUCCESS != s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error,SAMPLE_COMM_SVP_NNIE_UnloadModel failed!\n");
    }
    return s32Ret;
}

/* function : Yolov3 init */
static HI_S32 SampleSvpNnieYolov3ParamInit(SAMPLE_SVP_NNIE_CFG_S* pstCfg,
    SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S* pstSoftWareParam)
{
    HI_S32 s32Ret = HI_SUCCESS;
    /* init hardware para */
    s32Ret = SAMPLE_COMM_SVP_NNIE_ParamInit(pstCfg, pstNnieParam);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, INIT_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error(%#x),SAMPLE_COMM_SVP_NNIE_ParamInit failed!\n", s32Ret);

    /* init software para */
    s32Ret = SampleSvpNnieYolov3SoftwareInit(pstNnieParam,pstSoftWareParam);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, INIT_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error(%#x),SAMPLE_SVP_NNIE_Yolov3_SoftwareInit failed!\n", s32Ret);

    return s32Ret;
INIT_FAIL_0:
    s32Ret = SampleSvpNnieYolov3Deinit(pstNnieParam, pstSoftWareParam, NULL);
    SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error(%#x),SAMPLE_SVP_NNIE_Yolov3_Deinit failed!\n", s32Ret);
    return HI_FAILURE;
}


// AI_CPU 用
static HI_S32 SVP_NNIE_Sigmoid(HI_FLOAT *pf32Src, HI_U32 u32Num)
{
    HI_U32 i = 0;
    for (i = 0; i < u32Num; i++) {
        pf32Src[i] = SAMPLE_SVP_NNIE_SIGMOID(pf32Src[i]);
    }
    return HI_SUCCESS;
}

// AI_CPU 用
static HI_FLOAT SVP_NNIE_GetMaxVal(HI_FLOAT *pf32Val, HI_U32 u32Num, HI_U32 *pu32MaxValueIndex)
{
    HI_U32 i = 0;
    HI_FLOAT f32MaxTmp = 0;
    f32MaxTmp = pf32Val[0];
    *pu32MaxValueIndex = 0;
    for (i = 1; i < u32Num; i++) {
        if (pf32Val[i] > f32MaxTmp) {
            f32MaxTmp = pf32Val[i];
            *pu32MaxValueIndex = i;
        }
    }
    return f32MaxTmp;
}

/* //AI_CPU用
 * Prototype :   SVP_NNIE_Yolov1_Argswap
 * Description : this function is used to exchange data
 * Input :     HI_S32*  ps32Src1           [IN] first input array
 * HI_S32*  ps32Src2           [IN] second input array
 * HI_U32  u32ArraySize        [IN] array size
 */
static void SVP_NNIE_Yolov1_Argswap(HI_S32 *ps32Src1, HI_S32 *ps32Src2, HI_U32 u32ArraySize)
{
    HI_U32 i = 0;
    HI_S32 s32Tmp = 0;
    for (i = 0; i < u32ArraySize; i++) {
        s32Tmp = ps32Src1[i];
        ps32Src1[i] = ps32Src2[i];
        ps32Src2[i] = s32Tmp;
    }
}

/* // AI_CPU 用
 * Prototype :   SVP_NNIE_Yolov1_NonRecursiveArgQuickSort
 * Description : this function is used to do quick sort
 * Input :     HI_S32*  ps32Array          [IN] the array need to be sorted
 * HI_S32   s32Low             [IN] the start position of quick sort
 * HI_S32   s32High            [IN] the end position of quick sort
 * HI_U32   u32ArraySize       [IN] the element size of input array
 * HI_U32   u32ScoreIdx        [IN] the score index in array element
 * SAMPLE_SVP_NNIE_STACK_S *pstStack [IN] the buffer used to store start positions and end positions
 */
static HI_S32 SVP_NNIE_Yolo_NonRecursiveArgQuickSort(HI_S32 *ps32Array, HI_S32 s32Low, HI_S32 s32High,HI_U32 u32ArraySize, HI_U32 u32ScoreIdx, SAMPLE_SVP_NNIE_STACK_S *pstStack)
{
    HI_S32 i = s32Low;
    HI_S32 j = s32High;
    HI_S32 s32Top = 0;
    HI_S32 s32KeyConfidence = ps32Array[u32ArraySize * s32Low + u32ScoreIdx];
    pstStack[s32Top].s32Min = s32Low;
    pstStack[s32Top].s32Max = s32High;

    while (s32Top > -1) {
        s32Low = pstStack[s32Top].s32Min;
        s32High = pstStack[s32Top].s32Max;
        i = s32Low;
        j = s32High;
        s32Top--;

        s32KeyConfidence = ps32Array[u32ArraySize * s32Low + u32ScoreIdx];

        while (i < j) {
            while ((i < j) && (s32KeyConfidence > ps32Array[j * u32ArraySize + u32ScoreIdx])) {
                j--;
            }
            if (i < j) {
                SVP_NNIE_Yolov1_Argswap(&ps32Array[i * u32ArraySize], &ps32Array[j * u32ArraySize], u32ArraySize);
                i++;
            }
            while ((i < j) && (s32KeyConfidence < ps32Array[i * u32ArraySize + u32ScoreIdx])) {
                i++;
            }
            if (i < j) {
                SVP_NNIE_Yolov1_Argswap(&ps32Array[i * u32ArraySize], &ps32Array[j * u32ArraySize], u32ArraySize);
                j--;
            }
        }
        if (s32Low < i - 1) {
            s32Top++;
            pstStack[s32Top].s32Min = s32Low;
            pstStack[s32Top].s32Max = i - 1;
        }
        if (s32High > i + 1) {
            s32Top++;
            pstStack[s32Top].s32Min = i + 1;
            pstStack[s32Top].s32Max = s32High;
        }
    }
    return HI_SUCCESS;
}


/* // AI_CPU 用
 * Prototype    : SVP_NNIE_Yolov2_Iou
 * Description  : Yolov2 IOU
 * Input :     SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBbox1 [IN]  first bbox
 * SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBbox2 [IN]  second bbox
 * HI_U32    u32ClassNum     [IN]  Class num
 * HI_U32    u32GridNum      [IN]  grid num
 * HI_U32    u32BboxNum      [IN]  bbox num
 * HI_U32    u32ConfThresh   [IN]  confidence thresh
 * HI_U32    u32NmsThresh    [IN]  Nms thresh
 * HI_U32    u32OriImgWidth  [IN]  input image width
 * HI_U32    u32OriImgHeight [IN]  input image height
 * HI_U32*   pu32MemPool     [IN]  assist buffer
 * HI_S32    *ps32DstScores  [OUT]  dst score of ROI
 * HI_S32    *ps32DstRoi     [OUT]  dst Roi
 * HI_S32    *ps32ClassRoiNum[OUT]  dst roi num of each class
 */
static HI_DOUBLE SVP_NNIE_Yolov2_Iou(SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBbox1, SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBbox2)
{
    HI_FLOAT f32InterWidth = 0.0;
    HI_FLOAT f32InterHeight = 0.0;
    HI_DOUBLE f64InterArea = 0.0;
    HI_DOUBLE f64Box1Area = 0.0;
    HI_DOUBLE f64Box2Area = 0.0;
    HI_DOUBLE f64UnionArea = 0.0;

    f32InterWidth = SAMPLE_SVP_NNIE_MIN(pstBbox1->f32Xmax, pstBbox2->f32Xmax) -
        SAMPLE_SVP_NNIE_MAX(pstBbox1->f32Xmin, pstBbox2->f32Xmin);
    f32InterHeight = SAMPLE_SVP_NNIE_MIN(pstBbox1->f32Ymax, pstBbox2->f32Ymax) -
        SAMPLE_SVP_NNIE_MAX(pstBbox1->f32Ymin, pstBbox2->f32Ymin);
    if (f32InterWidth <= 0 || f32InterHeight <= 0)
        return 0;

    f64InterArea = f32InterWidth * f32InterHeight;
    f64Box1Area = (pstBbox1->f32Xmax - pstBbox1->f32Xmin) * (pstBbox1->f32Ymax - pstBbox1->f32Ymin);
    f64Box2Area = (pstBbox2->f32Xmax - pstBbox2->f32Xmin) * (pstBbox2->f32Ymax - pstBbox2->f32Ymin);
    f64UnionArea = f64Box1Area + f64Box2Area - f64InterArea;

    return f64InterArea / f64UnionArea;
}

/* // AI_CPU 用
 * Prototype    : SVP_NNIE_Yolov2_NonMaxSuppression
 * Description  : Yolov2 NonMaxSuppression function
 * Input :     SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBbox [IN]  input bbox
 * HI_U32    u32BoxNum       [IN]  Bbox num
 * HI_U32    u32ClassNum     [IN]  Class num
 * HI_U32    u32NmsThresh    [IN]  NMS thresh
 * HI_U32    u32BboxNum      [IN]  bbox num
 * HI_U32    u32MaxRoiNum    [IN]  max roi num
 */
static HI_S32 SVP_NNIE_Yolov2_NonMaxSuppression(SAMPLE_SVP_NNIE_YOLOV2_BBOX_S *pstBbox, HI_U32 u32BboxNum,
    HI_U32 u32NmsThresh, HI_U32 u32MaxRoiNum)
{
    HI_U32 i, j;
    HI_U32 u32Num = 0;
    HI_DOUBLE f64Iou = 0.0;

    for (i = 0; i < u32BboxNum && u32Num < u32MaxRoiNum; i++) {
        if (pstBbox[i].u32Mask == 0) {
            u32Num++;
            for (j = i + 1; j < u32BboxNum; j++) {
                if (pstBbox[j].u32Mask == 0) {
                    f64Iou = SVP_NNIE_Yolov2_Iou(&pstBbox[i], &pstBbox[j]);
                    if (f64Iou >= (HI_DOUBLE)u32NmsThresh / SAMPLE_SVP_NNIE_QUANT_BASE) {
                        pstBbox[j].u32Mask = 1;
                    }
                }
            }
        }
    }

    return HI_SUCCESS;
}


/*  //从sample_svp_nnie_software.c里复制出来的，应该没啥带问题
 * Prototype    : SVP_NNIE_Yolov3_GetResult
 * Description  : Yolov3 GetResult function
 * Input :      HI_S32    **pps32InputData     [IN]  pointer to the input data
 * HI_U32    au32GridNumWidth[]   [IN]  Grid num in width direction
 * HI_U32    au32GridNumHeight[]  [IN]  Grid num in height direction
 * HI_U32    au32Stride[]         [IN]  stride of input data
 * HI_U32    u32EachGridBbox      [IN]  Bbox num of each grid
 * HI_U32    u32ClassNum          [IN]  class num
 * HI_U32    u32SrcWidth          [IN]  input image width
 * HI_U32    u32SrcHeight         [IN]  input image height
 * HI_U32    u32MaxRoiNum         [IN]  Max output roi num
 * HI_U32    u32NmsThresh         [IN]  NMS thresh
 * HI_U32    u32ConfThresh        [IN]  conf thresh
 * HI_U32    af32Bias[][]         [IN]  bias
 * HI_U32*   pu32TmpBuf           [IN]  assist buffer
 * HI_S32    *ps32DstScores       [OUT] dst score
 * HI_S32    *ps32DstRoi          [OUT] dst roi
 * HI_S32    *ps32ClassRoiNum     [OUT] class roi num
 */
static HI_S32 SVP_NNIE_Yolov3_GetResult(HI_U64 au64InputBlobAddr[], HI_U32 au32GridNumWidth[],
    HI_U32 au32GridNumHeight[], HI_U32 au32Stride[], HI_U32 u32EachGridBbox, HI_U32 u32ClassNum, HI_U32 u32SrcWidth,
    HI_U32 u32SrcHeight, HI_U32 u32MaxRoiNum, HI_U32 u32NmsThresh, HI_U32 u32ConfThresh,
    HI_FLOAT af32Bias[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM][SAMPLE_SVP_NNIE_YOLOV3_EACH_GRID_BIAS_NUM],
    HI_S32 *ps32TmpBuf, HI_S32 *ps32DstScore, HI_S32 *ps32DstRoi, HI_S32 *ps32ClassRoiNum)
{
    HI_S32 *ps32InputBlob = NULL;
    HI_FLOAT *pf32Permute = NULL;
    SAMPLE_SVP_NNIE_YOLOV3_BBOX_S *pstBbox = NULL;
    HI_S32 *ps32AssistBuf = NULL;
    HI_U32 u32TotalBboxNum = 0, u32BboxNum = 0;
    HI_U32 u32ChnOffset = 0, u32HeightOffset = 0, u32Offset;
    HI_U32 u32GridXIdx, u32GridYIdx;
    HI_FLOAT f32StartX, f32StartY, f32Width, f32Height;
    HI_FLOAT f32ObjScore, f32MaxScore;
    HI_U32 u32MaxValueIndex = 0;
    HI_S32 s32ClassScore;
    HI_U32 u32ClassRoiNum;
    HI_U32 i = 0, j = 0, k = 0, c = 0, h = 0, w = 0;
    HI_U32 u32BlobSize = 0, u32MaxBlobSize = 0;

    for (i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++) {
        u32BlobSize = au32GridNumWidth[i] * au32GridNumHeight[i] * sizeof(HI_U32) *
            SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM * u32EachGridBbox;
        if (u32MaxBlobSize < u32BlobSize) u32MaxBlobSize = u32BlobSize;
    }

    for (i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++) {
        u32TotalBboxNum += au32GridNumWidth[i] * au32GridNumHeight[i] * u32EachGridBbox;
    }

    // get each tmpbuf addr
    SAMPLE_PRT("AI_CPU get each tmpbuf addr start.\n");
    pf32Permute = (HI_FLOAT *)ps32TmpBuf;
    pstBbox = (SAMPLE_SVP_NNIE_YOLOV3_BBOX_S *)(pf32Permute + u32MaxBlobSize / sizeof(HI_S32));
    ps32AssistBuf = (HI_S32 *)(pstBbox + u32TotalBboxNum);
    SAMPLE_PRT("AI_CPU get each tmpbuf addr over.\n");


    SAMPLE_PRT("AI_CPU decode bbox and calculate score start.\n");
    for (i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++) {
        // permute
        u32Offset = 0;
        ps32InputBlob = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, au64InputBlobAddr[i]);
        u32ChnOffset = au32GridNumHeight[i] * au32Stride[i] / sizeof(HI_S32);
        u32HeightOffset = au32Stride[i] / sizeof(HI_S32);
        for (h = 0; h < au32GridNumHeight[i]; h++) {
            for (w = 0; w < au32GridNumWidth[i]; w++) {
                for (c = 0; c < SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM * u32EachGridBbox; c++) {
                    pf32Permute[u32Offset++] = (HI_FLOAT)(ps32InputBlob[c * u32ChnOffset + h * u32HeightOffset + w]) / SAMPLE_SVP_NNIE_QUANT_BASE;
                }
            }
        }

        // decode bbox and calculate score
        for (j = 0; j < au32GridNumWidth[i] * au32GridNumHeight[i]; j++) {
            u32GridXIdx = j % au32GridNumWidth[i];
            u32GridYIdx = j / au32GridNumWidth[i];
            for (k = 0; k < u32EachGridBbox; k++) {
                u32MaxValueIndex = 0;
                u32Offset = (j * u32EachGridBbox + k) * SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM;
                // decode bbox
                f32StartX =
                    ((HI_FLOAT)u32GridXIdx + SAMPLE_SVP_NNIE_SIGMOID(pf32Permute[u32Offset + 0])) / au32GridNumWidth[i];
                f32StartY = ((HI_FLOAT)u32GridYIdx + SAMPLE_SVP_NNIE_SIGMOID(pf32Permute[u32Offset + 1])) /
                    au32GridNumHeight[i];
                if (u32SrcWidth == 0 || u32SrcHeight == 0) {
                    printf("Divisor u32SrcWidth or u32SrcHeight cannot be 0!\n");
                    return HI_FAILURE;
                }
                f32Width = (HI_FLOAT)(exp(pf32Permute[u32Offset + SAMPLE_SVP_NNIE_X_MAX_OFFSET]) *
                    af32Bias[i][2 * k]) / u32SrcWidth;
                f32Height = (HI_FLOAT)(exp(pf32Permute[u32Offset + SAMPLE_SVP_NNIE_Y_MAX_OFFSET]) *
                    af32Bias[i][2 * k + 1]) / u32SrcHeight;

                // calculate score
                (void)SVP_NNIE_Sigmoid(&pf32Permute[u32Offset+SAMPLE_SVP_NNIE_SCORE_OFFSET],(u32ClassNum + 1));
                f32ObjScore = pf32Permute[u32Offset + SAMPLE_SVP_NNIE_SCORE_OFFSET];
                f32MaxScore = SVP_NNIE_GetMaxVal(&pf32Permute[u32Offset + SAMPLE_SVP_NNIE_SUPPRESS_FLAG_OFFSET],
                    u32ClassNum, &u32MaxValueIndex);
                s32ClassScore = (HI_S32)(f32MaxScore * f32ObjScore * SAMPLE_SVP_NNIE_QUANT_BASE);

                // filter low score roi
                if ((HI_U32)s32ClassScore > u32ConfThresh) {
                    pstBbox[u32BboxNum].f32Xmin = (HI_FLOAT)(f32StartX - f32Width * 0.5f);
                    pstBbox[u32BboxNum].f32Ymin = (HI_FLOAT)(f32StartY - f32Height * 0.5f);
                    pstBbox[u32BboxNum].f32Xmax = (HI_FLOAT)(f32StartX + f32Width * 0.5f);
                    pstBbox[u32BboxNum].f32Ymax = (HI_FLOAT)(f32StartY + f32Height * 0.5f);
                    pstBbox[u32BboxNum].s32ClsScore = s32ClassScore;
                    pstBbox[u32BboxNum].u32Mask = 0;
                    pstBbox[u32BboxNum].u32ClassIdx = (HI_S32)(u32MaxValueIndex + 1);
                    u32BboxNum++;
                }
            }
        }
    }
    SAMPLE_PRT("AI_CPU decode bbox and calculate score over.\n");

    // quick sort
    SAMPLE_PRT("AI_CPU quick sort start.\n");
    if (u32BboxNum >= 1) {
        (void)SVP_NNIE_Yolo_NonRecursiveArgQuickSort((HI_S32 *)pstBbox, 0, u32BboxNum - 1,
        sizeof(SAMPLE_SVP_NNIE_YOLOV3_BBOX_S) / sizeof(HI_U32), 4, (SAMPLE_SVP_NNIE_STACK_S *)ps32AssistBuf);
    }
    SAMPLE_PRT("AI_CPU quick sort over.\n");

    // Yolov3 and Yolov2 have the same Nms operation
    // 很有意思 通过测试发现这个nms的算法实现很慢的说
    SAMPLE_PRT("AI_CPU NMS start.\n");  
    (void)SVP_NNIE_Yolov2_NonMaxSuppression(pstBbox, u32BboxNum, u32NmsThresh, u32BboxNum);
    SAMPLE_PRT("AI_CPU NMS over.\n");
    
    // Get result
    SAMPLE_PRT("AI_CPU get result start.\n");
    for (i = 1; i < u32ClassNum + 1; i++) {
        u32ClassRoiNum = 0;
        for (j = 0; j < u32BboxNum; j++) {
            if ((pstBbox[j].u32Mask == 0) && (i == pstBbox[j].u32ClassIdx) && (u32ClassRoiNum < u32MaxRoiNum)) {
                *(ps32DstRoi++) = SAMPLE_SVP_NNIE_MAX((HI_S32)(pstBbox[j].f32Xmin * u32SrcWidth), 0);
                *(ps32DstRoi++) = SAMPLE_SVP_NNIE_MAX((HI_S32)(pstBbox[j].f32Ymin * u32SrcHeight), 0);
                *(ps32DstRoi++) = SAMPLE_SVP_NNIE_MIN((HI_S32)(pstBbox[j].f32Xmax * u32SrcWidth), (HI_S32)u32SrcWidth);
                *(ps32DstRoi++) = SAMPLE_SVP_NNIE_MIN((HI_S32)(pstBbox[j].f32Ymax * u32SrcHeight), (HI_S32)u32SrcHeight);
                *(ps32DstScore++) = pstBbox[j].s32ClsScore;
                u32ClassRoiNum++;
            }
        }
        *(ps32ClassRoiNum + i) = u32ClassRoiNum;
    }
    SAMPLE_PRT("AI_CPU get result over.\n");

    return HI_SUCCESS;
}


//从sample_nnie.c里复制出来的，应该没啥带问题
static HI_S32 SAMPLE_SVP_NNIE_Yolov3_GetResult(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
    SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S *pstSoftwareParam)
{
    HI_U32 i = 0;
    HI_U64 au64InputBlobAddr[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM] = {0};
    HI_U32 au32Stride[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM] = {0};

    CHECK_NULL_PTR(pstNnieParam);
    CHECK_NULL_PTR(pstSoftwareParam);
    for (i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++) {
        au64InputBlobAddr[i] = pstNnieParam->astSegData[0].astDst[i].u64VirAddr;
        au32Stride[i] = pstNnieParam->astSegData[0].astDst[i].u32Stride;
    }
    return SVP_NNIE_Yolov3_GetResult(au64InputBlobAddr, pstSoftwareParam->au32GridNumWidth,
        pstSoftwareParam->au32GridNumHeight, au32Stride, pstSoftwareParam->u32BboxNumEachGrid,
        pstSoftwareParam->u32ClassNum, pstSoftwareParam->u32OriImWidth, pstSoftwareParam->u32OriImHeight,
        pstSoftwareParam->u32MaxRoiNum, pstSoftwareParam->u32NmsThresh, pstSoftwareParam->u32ConfThresh,
        pstSoftwareParam->af32Bias,
        SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stGetResultTmpBuf.u64VirAddr),
        SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstScore.u64VirAddr),
        SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stDstRoi.u64VirAddr),
        SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstSoftwareParam->stClassRoiNum.u64VirAddr));
}

/* function : print detection result */
static HI_S32 SAMPLE_SVP_NNIE_Detection_PrintResult(SVP_BLOB_S *pstDstScore, SVP_BLOB_S *pstDstRoi,
    SVP_BLOB_S *pstClassRoiNum, HI_FLOAT f32PrintResultThresh)
{
    HI_U32 i = 0, j = 0;
    HI_U32 u32RoiNumBias = 0, u32ScoreBias = 0, u32BboxBias = 0;
    HI_FLOAT f32Score = 0.0f;
    HI_S32 *ps32Score = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstDstScore->u64VirAddr);
    HI_S32 *ps32Roi = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstDstRoi->u64VirAddr);
    HI_S32 *ps32ClassRoiNum = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstClassRoiNum->u64VirAddr);
    HI_U32 u32ClassNum = pstClassRoiNum->unShape.stWhc.u32Width;
    HI_S32 s32XMin = 0, s32YMin = 0, s32XMax = 0, s32YMax = 0;

    u32RoiNumBias += ps32ClassRoiNum[0];
    for (i = 1; i < u32ClassNum; i++) {
        u32ScoreBias = u32RoiNumBias;
        u32BboxBias = u32RoiNumBias * SAMPLE_SVP_NNIE_COORDI_NUM;
        /* if the confidence score greater than result threshold, the result will be printed */
        if ((HI_FLOAT)ps32Score[u32ScoreBias] / SAMPLE_SVP_NNIE_QUANT_BASE >= f32PrintResultThresh &&
            ps32ClassRoiNum[i] != 0) {
            SAMPLE_SVP_TRACE_INFO("==== The %dth class box info====\n", i);
        }
        for (j = 0; j < (HI_U32)ps32ClassRoiNum[i]; j++) {
            f32Score = (HI_FLOAT)ps32Score[u32ScoreBias + j] / SAMPLE_SVP_NNIE_QUANT_BASE;
            if (f32Score < f32PrintResultThresh) {
                break;
            }
            s32XMin = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM];
            s32YMin = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM + 1]; /* to get next element of this array */
            s32XMax = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM + 2]; /* to get next element of this array */
            s32YMax = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM + 3]; /* to get next element of this array */
            SAMPLE_SVP_TRACE_INFO("%d %d %d %d %f\n", s32XMin, s32YMin, s32XMax, s32YMax, f32Score);
        }
        u32RoiNumBias += ps32ClassRoiNum[i];
    }
    return HI_SUCCESS;
}

/* function : fetch result */
static void Yolo3FetchRes(SVP_BLOB_S *pstDstScore, SVP_BLOB_S *pstDstRoi, SVP_BLOB_S *pstClassRoiNum,
    DetectObjInfo resBuf[], int resSize, int* resLen)
{
    HI_U32 i;
    HI_U32 j;
    HI_U32 u32RoiNumBias = 0;
    HI_U32 u32ScoreBias = 0;
    HI_U32 u32BboxBias = 0;
    HI_FLOAT f32Score = 0.0f;
    HI_S32* ps32Score = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstDstScore->u64VirAddr);
    HI_S32* ps32Roi = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstDstRoi->u64VirAddr);
    HI_S32* ps32ClassRoiNum = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstClassRoiNum->u64VirAddr);
    HI_U32 u32ClassNum = pstClassRoiNum->unShape.stWhc.u32Width;


    SAMPLE_PRT("Total Classes ---> u32ClassNum: (%d)\n",u32ClassNum);
    //SAMPLE_PRT("u32ClassNum:%d\n", u32ClassNum);
    HI_ASSERT(u32ClassNum == 25); // 2: the number of class
    HI_ASSERT(resSize > 0);
    
    int current_cls = 1;
    int resId = 0;
    *resLen = 0;
    memset_s(resBuf, resSize * sizeof(resBuf[0]), 0x00, resSize * sizeof(resBuf[0]));

    u32RoiNumBias += ps32ClassRoiNum[0];
    for (i = 1; i < u32ClassNum; i++) {

        current_cls = i;
        u32ScoreBias = u32RoiNumBias;
        u32BboxBias = u32RoiNumBias * SAMPLE_SVP_NNIE_COORDI_NUM;
        /* if the confidence score greater than result threshold, the result will be printed */
        if ((HI_FLOAT)ps32Score[u32ScoreBias] / SAMPLE_SVP_NNIE_QUANT_BASE >=
            THRESH_MIN && ps32ClassRoiNum[i] != 0) {
        }
        for (j = 0; j < (HI_U32)ps32ClassRoiNum[i]; j++) {
            f32Score = (HI_FLOAT)ps32Score[u32ScoreBias + j] / SAMPLE_SVP_NNIE_QUANT_BASE;
            if (f32Score < THRESH_MIN) {
                SAMPLE_PRT("f32Score:%.2f\n", f32Score);
                break;
            }
            if (resId >= resSize) {
                SAMPLE_PRT("yolo3 resBuf full\n");
                break;
            }
            //resBuf[resId].cls = 1; // class 1
            resBuf[resId].cls = current_cls;
            resBuf[resId].score = f32Score;

            RectBox *box = &resBuf[resId].box;
            box->xmin = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM];
            box->ymin = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM + ARRAY_SUBSCRIPT_OFFSET_1];
            box->xmax = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM + ARRAY_SUBSCRIPT_OFFSET_2];
            box->ymax = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM + ARRAY_SUBSCRIPT_OFFSET_3];
            if (box->xmin >= box->xmax || box->ymin >= box->ymax) {
                SAMPLE_PRT("yolo1_orig: {%d, %d, %d, %d}, %f, discard for coord ERR\n",
                    box->xmin, box->ymin, box->xmax, box->ymax, f32Score);
            } else {
                resId++;
            }
        }
        u32RoiNumBias += ps32ClassRoiNum[i];
    }

    *resLen = resId;
}

/* function : creat yolo3 model basad mode file */
int Yolo3Create(SAMPLE_SVP_NNIE_CFG_S **model, const char* modelFile)
{
    SAMPLE_SVP_NNIE_CFG_S *self;  //用来返回模型的指针
    HI_U32 u32PicNum = 1;  //即模型转换输出的batch数
    HI_S32 s32Ret;

    self = (SAMPLE_SVP_NNIE_CFG_S*)malloc(sizeof(*self));
    HI_ASSERT(self);
    memset_s(self, sizeof(*self), 0x00, sizeof(*self));

    // Set configuration parameter
    self->pszPic = NULL;
    self->u32MaxInputNum = u32PicNum; // max input image num in each batch
    self->u32MaxRoiNum = 0; //300
    self->aenNnieCoreId[0] = SVP_NNIE_ID_0; // set NNIE core

    // /* Sys init */
    // s32Ret = SAMPLE_COMM_SVP_CheckSysInit();
    // SAMPLE_SVP_CHECK_EXPR_GOTO(s32Ret != HI_SUCCESS,YOLOV3_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
    //     "Error,SAMPLE_COMM_SVP_CheckSysInit failed!\n");

    // Yolov3 Load model
    SAMPLE_SVP_TRACE_INFO("Yolov3 Load model!\n");
    s32Ret = SAMPLE_COMM_SVP_NNIE_LoadModel((char*)modelFile, &g_stYolov3Model);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, YOLOV3_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error, SAMPLE_COMM_SVP_NNIE_LoadModel failed!\n");

    /* Yolov3 parameter initialization */
    /* Yolov3 software parameters are set in SampleSvpNnieYolov3SoftwareInit,
      if user has changed net struct, please make sure the parameter settings in
      SampleSvpNnieYolov3SoftwareInit function are correct */
    SAMPLE_SVP_TRACE_INFO("Yolov3 parameter initialization!\n");
    g_stYolov3NnieParam.pstModel = &g_stYolov3Model.stModel;
    s32Ret = SampleSvpNnieYolov3ParamInit(self, &g_stYolov3NnieParam, &g_stYolov3SoftwareParam);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, YOLOV3_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SampleSvpNnieYolov3ParamInit failed!\n");

    // model important info
    SAMPLE_PRT("model.base={ type=%x, frmNum=%u, chnNum=%u, w=%u, h=%u, stride=%u }\n",
        g_stYolov3NnieParam.astSegData[0].astSrc[0].enType,
        g_stYolov3NnieParam.astSegData[0].astSrc[0].u32Num,
        g_stYolov3NnieParam.astSegData[0].astSrc[0].unShape.stWhc.u32Chn,
        g_stYolov3NnieParam.astSegData[0].astSrc[0].unShape.stWhc.u32Width,
        g_stYolov3NnieParam.astSegData[0].astSrc[0].unShape.stWhc.u32Height,
        g_stYolov3NnieParam.astSegData[0].astSrc[0].u32Stride);
    SAMPLE_PRT("model.soft={ class=%u, ori.w=%u, ori.h=%u, bnum=%u, \
        grid.w=%u, grid.h=%u, nmsThresh=%u, confThresh=%u, u32MaxRoiNum=%u }\n",
        g_stYolov3SoftwareParam.u32ClassNum,
        g_stYolov3SoftwareParam.u32OriImWidth,
        g_stYolov3SoftwareParam.u32OriImHeight,
        g_stYolov3SoftwareParam.u32BboxNumEachGrid,
        g_stYolov3SoftwareParam.au32GridNumWidth[0],
        g_stYolov3SoftwareParam.au32GridNumHeight[0],
        g_stYolov3SoftwareParam.u32NmsThresh,
        g_stYolov3SoftwareParam.u32ConfThresh,
        g_stYolov3SoftwareParam.u32MaxRoiNum);

    *model = self;
    return 0;

    YOLOV3_FAIL_0:
        SAMPLE_PRT("Yolov3Create failed! SampleSvpNnieYolov3Deinit\n");
        SampleSvpNnieYolov3Deinit(&g_stYolov3NnieParam, &g_stYolov3SoftwareParam, &g_stYolov3Model);
        *model = NULL;
        return -1;
}

/* function : destory yolo3 model */
void Yolo3Destory(SAMPLE_SVP_NNIE_CFG_S *self)
{
    SampleSvpNnieYolov3Deinit(&g_stYolov3NnieParam, &g_stYolov3SoftwareParam, &g_stYolov3Model);
    //SAMPLE_COMM_SVP_CheckSysExit();
    free(self);
}

/* function : calculation yuv image */
int Yolo3CalImg(SAMPLE_SVP_NNIE_CFG_S* self,
    const IVE_IMAGE_S *img, DetectObjInfo resBuf[], int resSize, int* resLen)
{
    SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S stInputDataIdx = {0};
    SAMPLE_SVP_NNIE_PROCESS_SEG_INDEX_S stProcSegIdx = {0};
    HI_S32 s32Ret;


    self->pszPic = NULL;//self->pszPic = img;
    stInputDataIdx.u32SegIdx = 0;
    stInputDataIdx.u32NodeIdx = 0;

    SAMPLE_SVP_TRACE_INFO("Yolov3 inference start!\n");
    
    /* Fill src data */
    SAMPLE_PRT("Fill NNIE by Image Start!\n");
    s32Ret = FillNnieByImg(self, &g_stYolov3NnieParam, stInputDataIdx.u32SegIdx, stInputDataIdx.u32NodeIdx, img);
    SAMPLE_SVP_CHECK_EXPR_GOTO(s32Ret != HI_SUCCESS, YOLOV3_FAIL_CAL, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error, fill NNIE by Img failed!\n");
    SAMPLE_PRT("Fill NNIE by Image Over!\n");

    // NNIE process(process the 0-th segment)
    SAMPLE_PRT("Yolov3 NNIE Process Start!\n");
    stProcSegIdx.u32SegIdx = 0;
    s32Ret = SAMPLE_SVP_NNIE_Forward(&g_stYolov3NnieParam, &stInputDataIdx, &stProcSegIdx, HI_TRUE);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, YOLOV3_FAIL_CAL, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SAMPLE_SVP_NNIE_Forward failed!\n");
    SAMPLE_PRT("Yolov3 NNIE Process Over!\n");


    /* Software process */
    /* if user has changed net struct, please make sure SAMPLE_SVP_NNIE_Yolov3_GetResult
     function input datas are correct */
    SAMPLE_PRT("Yolov3 AI_CPU Get_Result Start!\n");
    s32Ret = SAMPLE_SVP_NNIE_Yolov3_GetResult(&g_stYolov3NnieParam, &g_stYolov3SoftwareParam);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, YOLOV3_FAIL_CAL, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SAMPLE_SVP_NNIE_Yolov3_GetResult failed!\n");
    SAMPLE_PRT("Yolov3 AI_CPU Get_Result Over!\n");


    HI_FLOAT f32PrintResultThresh = 0.7f;
    SAMPLE_PRT("Yolov3 result:\n");
    (void)SAMPLE_SVP_NNIE_Detection_PrintResult(&g_stYolov3SoftwareParam.stDstScore, &g_stYolov3SoftwareParam.stDstRoi,&g_stYolov3SoftwareParam.stClassRoiNum, f32PrintResultThresh);
    SAMPLE_PRT("Yolov3 Print Result Over!\n");
    //SAMPLE_PRT("No Yolov3 Print Result!\n");

    Yolo3FetchRes(&g_stYolov3SoftwareParam.stDstScore,&g_stYolov3SoftwareParam.stDstRoi, &g_stYolov3SoftwareParam.stClassRoiNum, resBuf, resSize, resLen);
    SAMPLE_PRT("Yolov3 Fetch Result Over!\n");

    SAMPLE_PRT("Yolov3 inference Over!\n");
    return 0;

YOLOV3_FAIL_CAL:
    SAMPLE_PRT("Yolov3 Fail CAL!\n");
        return -1;
}


#ifdef __cplusplus
#if __cplusplus
}
#endif
#endif /* End of #ifdef __cplusplus */
